Contributions from iNaturalist national sites

Data analyses

Author

Florencia Grattarola

Published

October 2, 2025

The iNaturalist Network is a localised experience that is fully connected to the global iNaturalist community. Network members are local institutions that promote local use and facilitate the use of data from iNaturalist to benefit local

The aim of this report is to give an account of the importance of the iNaturalist network members by analysing the number of records for each country.

Code
library(httr)
library(jsonlite)
library(knitr)
library(rgbif)
library(ggrepel)
library(extrafont)
library(patchwork)
library(parallel)
library(doParallel)
library(hstats)
library(vip)
library(future)
library(tidymodels)
library(tidyverse)  

options(knitr.kable.NA = '') 
options(scipen=999)

iNaturalist National Sites

Code
iNat_network <- 
  tribble(~'site', ~'site_name', ~'site_id',
        'Global', 'iNaturalist', 1,
        'Mexico', 'iNaturalistMX', 2,
        'New Zealand', 'iNaturalistNZ', 3,
        'Canada', 'iNaturalist.ca', 5,
        'Colombia', 'NaturalistaCO', 6,
        'Portugal', 'BioDiversity4All', 8,
        'Australia', 'iNaturalistAU', 9,
        'Panama', 'iNaturalistPa', 13,
        'Ecuador', 'iNaturalistEc', 14,
        'Israel', 'iNaturalistil', 15,
        'Argentina', 'ArgentiNat', 16,
        'Costa Rica', 'NaturalistaCR', 17,
        'Chile', 'iNaturalistCL', 18,
        'Finland', 'iNaturalistFi', 20,
        'Sweeden', 'iNaturalist.Se', 21,
        'Spain', 'Natusfera', 22,
        'Greece', 'iNaturalistGR', 23,
        'Guatemala', 'iNaturalistGT', 24,
        'United Kingdom', 'iNaturalistUK', 25,
        'Luxembourg', 'iNaturalist.LU', 26,
        'Taiwan', 'iNaturalistTW', 27,
        'Uruguay', 'NaturalistaUY', 28)

iNat_network %>% 
  mutate('#'= row_number()) %>% relocate('#') %>% 
  rename(`Site` = site,
         `Name`=site_name,
         `ID`=site_id) %>% 
  kableExtra::kbl(digits=1, format.args = list(big.mark = ',')) %>% 
  kableExtra::kable_material('striped') %>% 
  kableExtra::row_spec(row = c(2,5,8,9,11,12,13,18,22), bold = T, color = "white", background = "#228A22")
Members of the iNaturalist Network. Shown in green are the sites from Latin America
# Site Name ID
1 Global iNaturalist 1
2 Mexico iNaturalistMX 2
3 New Zealand iNaturalistNZ 3
4 Canada iNaturalist.ca 5
5 Colombia NaturalistaCO 6
6 Portugal BioDiversity4All 8
7 Australia iNaturalistAU 9
8 Panama iNaturalistPa 13
9 Ecuador iNaturalistEc 14
10 Israel iNaturalistil 15
11 Argentina ArgentiNat 16
12 Costa Rica NaturalistaCR 17
13 Chile iNaturalistCL 18
14 Finland iNaturalistFi 20
15 Sweeden iNaturalist.Se 21
16 Spain Natusfera 22
17 Greece iNaturalistGR 23
18 Guatemala iNaturalistGT 24
19 United Kingdom iNaturalistUK 25
20 Luxembourg iNaturalist.LU 26
21 Taiwan iNaturalistTW 27
22 Uruguay NaturalistaUY 28

Methods

We tested different explanatory variables and saw which is the model that best explains the values a country has for iNaturalist. Indicators per country were extracted using the WDI: World Development Indicators (World Bank) on the 26th of September, 2025.

Response variables:

  • total number of records on iNaturalist per country: n_records_inat.
  • proportion of records from iNaturalist on GBIF per country (proxy for quality, research- grade records): p_gbif.
  • number of users recording in the country (this is not exactly the users from the country, but users that have generated records in the country): n_users.
  • number of taxa recorded in the country (this is not necessarily at the species level): n_taxa.

Explanatory variables:

  • population of the country: population (SP.POP.TOTL).
  • area of the country in km2: area (AG.SRF.TOTL.K2).
  • country’s centroid latitude (as a proxy of expected biodiversity): latitude (using rnaturalearth country polygons).
  • GDP per capita: gdp_per_capita (NY.GDP.PCAP.CD).
  • % of the GDP of the country dedicated to research: gdp_in_research GB.XPD.RSDV.GD.ZS).

Data extraction

Functions

Code
source('R/variables_per_country.R')

Data download

Code
UnitedStates <- tibble(country_name= 'United States')

UnitedStates <- UnitedStates %>% 
  mutate(country_code = countrycode::countrycode(country_name,
                                                 origin = 'country.name',
                                                 destination = 'iso2c'))

n_inat_gbif_country <- getGBIFrecordsPerCountry(UnitedStates$country_code)
n_inat_country <- getiNatRecordsPerCountry(UnitedStates$country_name)
n_users_country <- getiNatUsersPerCountry(UnitedStates$country_name)
n_taxa_country <- getiNatTaxaPerCountry(UnitedStates$country_name)

area_country <- getAreaPerCountry(UnitedStates$country_code)
population <- getPopulationPerCountry(UnitedStates$country_code)
gdp_per_capita <- getGDPperCapitaPerCountry(UnitedStates$country_code)
gdp_in_research <- getGDPinResearchPerCountry(UnitedStates$country_code)
latitude <- getLatitudePerCountry(UnitedStates$country_code)

data_variables_UnitedStates <- left_join(left_join(left_join(left_join(
  left_join(left_join(left_join(left_join(left_join(
  UnitedStates, n_inat_gbif_country),
  n_inat_country), 
  n_users_country),
  n_taxa_country),
  area_country), 
  population), 
  gdp_per_capita), 
  gdp_in_research), latitude)

saveRDS(data_variables_UnitedStates, 'data/UnitedStates_data_variables.rds')

########################################################################

America <- tibble(country_name= c('Canada', 'Mexico', 'Brazil', 'Costa Rica', 'Colombia', 'Peru', 'Argentina', 'Ecuador', 'Panama', 'Chile', 'Venezuela', 'Belize', 'Honduras', 'Bolivia', 'Guatemala', 'Cuba', 'Nicaragua', 'Paraguay', 'Bahamas', 'Jamaica', 'Trinidad and Tobago', 'Guyana', 'Dominican Republic', 'El Salvador', 'Suriname', 'Uruguay', 'Haiti'))

America <- America %>% 
  mutate(country_code = countrycode::countrycode(country_name,
                                                 origin = 'country.name',
                                                 destination = 'iso2c'))

America <- left_join(America, iNat_network %>% rename(country_name=site))

n_inat_gbif_country <- getGBIFrecordsPerCountry(America$country_code)
n_inat_country <- getiNatRecordsPerCountry(America$country_name)
n_users_country <- getiNatUsersPerCountry(America$country_name)
n_taxa_country <- getiNatTaxaPerCountry(America$country_name)

area_country <- getAreaPerCountry(America$country_code)
population <- getPopulationPerCountry(America$country_code)
gdp_per_capita <- getGDPperCapitaPerCountry(America$country_code)
gdp_in_research <- getGDPinResearchPerCountry(America$country_code)
latitude <- getLatitudePerCountry(America$country_code)

data_variables_America <- left_join(left_join(left_join(left_join(
  left_join(left_join(left_join(left_join(left_join(
  America, n_inat_gbif_country),
  n_inat_country), 
  n_users_country),
  n_taxa_country),
  area_country), 
  population), 
  gdp_per_capita), 
  gdp_in_research), latitude)

saveRDS(data_variables_America, 'data/America_data_variables.rds')

########################################################################

Europe <- tibble(country_name = c('Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'United Kingdom', 'Iceland', 'Liechtenstein', 'Norway', 'Switzerland', 'Albania', 'Bosnia and Herzegovina', 'Georgia', 'Moldova', 'Montenegro', 'Macedonia', 'Serbia', 'Turkey', 'Ukraine'))

Europe <- Europe %>% 
  mutate(country_code = countrycode::countrycode(country_name,
                                                 origin = 'country.name',
                                                 destination = 'iso2c'))

Europe <- left_join(Europe, iNat_network %>% rename(country_name=site))

n_inat_gbif_country <- getGBIFrecordsPerCountry(Europe$country_code)
n_inat_country <- getiNatRecordsPerCountry(Europe$country_name)
n_users_country <- getiNatUsersPerCountry(Europe$country_name)
n_taxa_country <- getiNatTaxaPerCountry(Europe$country_name)

area_country <- getAreaPerCountry(Europe$country_code)
population <- getPopulationPerCountry(Europe$country_code)
gdp_per_capita <- getGDPperCapitaPerCountry(Europe$country_code)
gdp_in_research <- getGDPinResearchPerCountry(Europe$country_code)
latitude <- getLatitudePerCountry(Europe$country_code)

data_variables_Europe <- left_join(left_join(left_join(left_join(
  left_join(left_join(left_join(left_join(left_join(
  Europe, n_inat_gbif_country),
  n_inat_country), 
  n_users_country),
  n_taxa_country),
  area_country), 
  population), 
  gdp_per_capita), 
  gdp_in_research), latitude)

saveRDS(data_variables_Europe, 'data/Europe_data_variables.rds')

########################################################################

Asia <- tibble(country_name = c('India', 'China', 'Indonesia', 'Pakistan', 'Bangladesh', 'Japan', 'Philippines', 'Vietnam', 'Iran', 'Turkey', 'Thailand', 'Myanmar', 'South Korea','Iraq', 'Afghanistan', 'Yemen', 'Uzbekistan', 'Malaysia', 'Saudi Arabia', 'Nepal', 'North Korea','Syria', 'Sri Lanka','Kazakhstan', 'Cambodia', 'Jordan', 'United Arab Emirates', 'Tajikistan', 'Azerbaijan', 'Israel', 'Laos', 'Turkmenistan', 'Kyrgyzstan', 'Singapore', 'Lebanon', 'Palestine','Oman', 'Kuwait', 'Georgia', 'Mongolia', 'Qatar', 'Armenia', 'Bahrain', 'Timor Leste', 'Cyprus', 'Bhutan', 'Maldives', 'Brunei', 'Taiwan'))

Asia <- Asia %>% 
  mutate(country_code = countrycode::countrycode(country_name,
                                                 origin = 'country.name',
                                                 destination = 'iso2c'))

Asia <- left_join(Asia, iNat_network %>% rename(country_name=site))

n_inat_gbif_country <- getGBIFrecordsPerCountry(Asia$country_code)
n_inat_country <- getiNatRecordsPerCountry(Asia$country_name)
n_users_country <- getiNatUsersPerCountry(Asia$country_name)
n_taxa_country <- getiNatTaxaPerCountry(Asia$country_name)

area_country <- getAreaPerCountry(Asia$country_code)
population <- getPopulationPerCountry(Asia$country_code)
gdp_per_capita <- getGDPperCapitaPerCountry(Asia$country_code)
gdp_in_research <- getGDPinResearchPerCountry(Asia$country_code)
latitude <- getLatitudePerCountry(Asia$country_code)

data_variables_Asia <- left_join(left_join(left_join(left_join(
  left_join(left_join(left_join(left_join(left_join(
  Asia, n_inat_gbif_country),
  n_inat_country), 
  n_users_country),
  n_taxa_country),
  area_country), 
  population), 
  gdp_per_capita), 
  gdp_in_research), latitude)

data_variables_Asia <- data_variables_Asia %>% 
  mutate(area = ifelse(country_name == 'Taiwan', 36197, area),
         population = ifelse(country_name == 'Taiwan', 23365274, population))

saveRDS(data_variables_Asia, 'data/Asia_data_variables.rds')

########################################################################

Oceania <- tibble(country_name = c('Australia', 'Papua New Guinea', 'New Zealand', 'Fiji', 'Solomon Islands', 'Federated States of Micronesia', 'Vanuatu', 'Samoa', 'Kiribati', 'Tonga', 'Marshall Islands', 'Palau', 'Tuvalu', 'Nauru'))

Oceania <- Oceania %>% 
  mutate(country_code = countrycode::countrycode(country_name,
                                                 origin = 'country.name',
                                                 destination = 'iso2c'))

Oceania <- left_join(Oceania, iNat_network %>% rename(country_name=site))

n_inat_gbif_country <- getGBIFrecordsPerCountry(Oceania$country_code)
n_inat_country <- getiNatRecordsPerCountry(Oceania$country_name)
n_users_country <- getiNatUsersPerCountry(Oceania$country_name)
n_taxa_country <- getiNatTaxaPerCountry(Oceania$country_name)

area_country <- getAreaPerCountry(Oceania$country_code)
population <- getPopulationPerCountry(Oceania$country_code)
gdp_per_capita <- getGDPperCapitaPerCountry(Oceania$country_code)
gdp_in_research <- getGDPinResearchPerCountry(Oceania$country_code)
latitude <- getLatitudePerCountry(Oceania$country_code)

data_variables_Oceania <- left_join(left_join(left_join(left_join(
  left_join(left_join(left_join(left_join(left_join(
  Oceania, n_inat_gbif_country),
  n_inat_country), 
  n_users_country),
  n_taxa_country),
  area_country), 
  population), 
  gdp_per_capita), 
  gdp_in_research), latitude)

saveRDS(data_variables_Oceania, 'data/Oceania_data_variables.rds')

########################################################################

variables_global <- bind_rows(data_variables_UnitedStates %>% 
                                mutate(continent = 'America'),
                              data_variables_America %>% 
                                mutate(continent = 'America'),
                              data_variables_Europe %>% 
                                mutate(continent = 'Europe'),
                              data_variables_Asia %>% 
                                mutate(continent = 'Asia'),
                              data_variables_Oceania %>% 
                                mutate(continent = 'Oceania')) %>% 
  unique()

# variables_global <- bind_rows(readRDS('data/UnitedStates_data_variables.rds') %>%
#                                 mutate(continent = 'America'),
#   readRDS('data/America_data_variables.rds') %>%
#                                 mutate(continent = 'America'),
#           readRDS('data/Europe_data_variables.rds') %>%
#                                 mutate(continent = 'Europe'),
#           readRDS('data/Asia_data_variables.rds') %>%
#                                 mutate(continent = 'Asia'),
#           readRDS('data/Oceania_data_variables.rds') %>%
#                                 mutate(continent = 'Oceania'))  %>%
#   unique()

saveRDS(variables_global, 'data/Global_data_variables.rds')

Summary of the data

Summary of the data for the Network members

Total number of records on iNaturalist per country

Code
ids <- data_variables %>% 
  select(country_name,site_name, n_records_inat) %>% 
  arrange(desc(n_records_inat)) %>% 
  with(which(!is.na(site_name)))

data_variables %>% 
  select(country_name,site_name, n_records_inat) %>% 
  arrange(desc(n_records_inat)) %>% 
  mutate('#'= row_number()) %>% relocate('#') %>% 
  rename(`Country` = country_name,
         `iNat site` = site_name,
         `Records on iNat`=n_records_inat) %>% 
  kableExtra::kbl(digits=4, format.args = list(big.mark = ',')) %>% 
  kableExtra::kable_material('striped') %>% 
  kableExtra::row_spec(ids, bold = T, color = "white", background = "#228A22") %>% 
  kableExtra::scroll_box(height = '600px')
# Country iNat site Records on iNat
1 United States 117,967,387
2 Canada iNaturalist.ca 20,889,613
3 Australia iNaturalistAU 11,976,616
4 Mexico iNaturalistMX 8,975,015
5 United Kingdom iNaturalistUK 8,141,711
6 Germany 6,081,430
7 France 5,749,959
8 Spain Natusfera 5,162,544
9 Italy 4,147,436
10 Taiwan iNaturalistTW 4,052,959
11 India 4,013,726
12 Brazil 3,829,606
13 New Zealand iNaturalistNZ 3,148,237
14 Austria 2,935,237
15 Portugal BioDiversity4All 2,332,457
16 China 2,042,227
17 Colombia NaturalistaCO 2,040,943
18 Ecuador iNaturalistEc 2,024,022
19 Argentina ArgentiNat 1,926,013
20 Ukraine 1,875,798
21 Denmark 1,854,729
22 Costa Rica NaturalistaCR 1,764,500
23 Poland 1,584,642
24 Czechia 1,487,638
25 Finland iNaturalistFi 1,424,762
26 Japan 1,156,139
27 Thailand 1,108,651
28 Malaysia 1,092,232
29 Netherlands 1,083,404
30 Indonesia 1,071,014
31 Switzerland 1,069,803
32 Chile iNaturalistCL 947,995
33 Peru 870,745
34 Bolivia 838,793
35 Singapore 824,322
36 Sweden 814,504
37 Greece iNaturalistGR 798,341
38 Panama iNaturalistPa 754,561
39 Belgium 726,270
40 South Korea 611,801
41 Hungary 584,655
42 Lithuania 526,786
43 Philippines 514,868
44 Croatia 511,726
45 Norway 437,397
46 Turkey 426,156
47 Turkey 426,156
48 Luxembourg iNaturalist.LU 416,368
49 Israel iNaturalistil 385,310
50 Honduras 357,290
51 Ireland 329,796
52 Romania 320,947
53 Slovakia 285,607
54 Kazakhstan 252,847
55 Slovenia 244,685
56 Sri Lanka 226,260
57 Vietnam 221,263
58 Bulgaria 198,429
59 Georgia 184,488
60 Georgia 184,488
61 Guatemala iNaturalistGT 181,281
62 Belize 169,354
63 Uruguay NaturalistaUY 164,832
64 Serbia 158,850
65 Dominican Republic 154,161
66 Iceland 148,739
67 Trinidad and Tobago 140,844
68 Nicaragua 131,036
69 Nepal 130,339
70 Mongolia 115,092
71 Venezuela 103,603
72 El Salvador 98,296
73 Cuba 91,235
74 Estonia 89,338
75 Cambodia 89,117
76 Latvia 88,631
77 Albania 82,935
78 Armenia 78,661
79 Jamaica 78,158
80 Cyprus 77,430
81 Cyprus 77,430
82 United Arab Emirates 76,021
83 Fiji 74,285
84 Montenegro 73,202
85 Maldives 69,967
86 Iran 67,337
87 Bahamas 64,650
88 Uzbekistan 53,515
89 Bosnia and Herzegovina 52,898
90 Bhutan 52,621
91 Kyrgyzstan 47,691
92 Pakistan 42,750
93 Paraguay 41,175
94 Papua New Guinea 40,484
95 Bangladesh 40,082
96 Myanmar 34,611
97 Palestine 34,110
98 Saudi Arabia 32,759
99 Oman 32,143
100 Suriname 31,834
101 Guyana 30,510
102 Malta 30,143
103 Macedonia 29,846
104 Kuwait 29,436
105 Laos 28,762
106 Vanuatu 25,780
107 Syria 25,686
108 Marshall Islands 24,770
109 Jordan 22,791
110 Azerbaijan 21,364
111 Palau 21,237
112 Haiti 17,040
113 Moldova 16,802
114 Iraq 15,958
115 Solomon Islands 15,829
116 Lebanon 15,320
117 Brunei 13,443
118 Tajikistan 12,286
119 Qatar 11,253
120 Yemen 9,948
121 Tonga 8,488
122 Federated States of Micronesia 6,372
123 Samoa 5,917
124 Liechtenstein 5,667
125 Bahrain 3,656
126 Tuvalu 2,968
127 Kiribati 2,585
128 North Korea 2,143
129 Afghanistan 1,450
130 Turkmenistan 662
131 Nauru 103
132 Timor Leste

Number of records from iNaturalist on GBIF per country

Code
ids <- data_variables %>% 
  select(country_name, site_name, n_records_gbif, n_records_gbif_inat) %>% 
  mutate(p_gbif=n_records_gbif_inat*100/n_records_gbif) %>% 
  arrange(desc(p_gbif)) %>% 
  with(which(!is.na(site_name)))

data_variables %>% 
  select(country_name, site_name, n_records_gbif, n_records_gbif_inat) %>% 
  mutate(p_gbif=n_records_gbif_inat*100/n_records_gbif) %>% 
  arrange(desc(p_gbif)) %>% 
  mutate('#'= row_number()) %>% relocate('#') %>% 
  select(-n_records_gbif_inat) %>% 
  rename(`Country` = country_name,
         `iNat site` = site_name,
         `Records from iNat on GBIF`=n_records_gbif,
         `Proportion`=p_gbif) %>% 
  kableExtra::kbl(digits=4, format.args = list(big.mark = ',')) %>% 
  kableExtra::kable_material('striped') %>% 
  kableExtra::row_spec(ids, bold = T, color = "white", background = "#228A22") %>% 
  kableExtra::scroll_box(height = '600px')
# Country iNat site Records from iNat on GBIF Proportion
1 Maldives 108,822 28.6808
2 Ukraine 3,931,831 27.5579
3 Albania 144,430 22.7903
4 Italy 8,596,699 21.9913
5 Kazakhstan 539,075 21.7589
6 Singapore 1,974,945 20.2467
7 Croatia 1,274,969 19.0917
8 Tuvalu 9,776 18.9546
9 Montenegro 174,675 18.8745
10 Bosnia and Herzegovina 98,561 17.4674
11 Malta 99,495 16.9767
12 Lithuania 1,479,823 16.4623
13 Uzbekistan 148,530 15.3498
14 Fiji 307,778 14.7860
15 Hungary 2,226,224 13.2507
16 Marshall Islands 120,862 13.0322
17 Czechia 5,318,179 11.9122
18 Slovenia 921,600 11.8601
19 New Zealand iNaturalistNZ 16,738,367 11.7287
20 Indonesia 3,461,778 11.7265
21 Austria 15,651,302 11.1046
22 Timor Leste 97,706 11.0812
23 Greece iNaturalistGR 4,019,188 10.3777
24 Mexico iNaturalistMX 35,031,471 9.9170
25 Armenia 242,972 9.8859
26 Romania 1,476,087 9.8087
27 Iraq 76,252 9.5447
28 Mongolia 682,658 8.5092
29 Malaysia 3,807,247 8.0152
30 Slovakia 1,921,795 7.7925
31 Cyprus 647,634 7.1392
32 Cyprus 647,634 7.1392
33 Kyrgyzstan 253,585 6.9630
34 Brunei 56,851 6.8301
35 Luxembourg iNaturalist.LU 3,410,536 6.6802
36 Taiwan iNaturalistTW 24,746,071 6.6399
37 Vietnam 976,607 6.6174
38 Dominican Republic 929,637 6.5430
39 Argentina ArgentiNat 17,173,754 6.5071
40 Macedonia 169,132 6.5008
41 Vanuatu 174,557 6.3601
42 Bahrain 27,185 6.2203
43 Philippines 2,461,357 5.7149
44 Jordan 169,041 5.6986
45 Japan 9,498,086 5.4946
46 Solomon Islands 195,781 5.3274
47 Thailand 7,522,142 5.2464
48 China 10,802,967 5.1972
49 Palau 216,168 5.1585
50 Portugal BioDiversity4All 21,577,985 5.1444
51 Canada iNaturalist.ca 208,431,531 4.9375
52 Georgia 1,331,321 4.8151
53 Georgia 1,331,321 4.8151
54 Yemen 132,460 4.7743
55 Moldova 131,574 4.7418
56 Samoa 52,369 4.7280
57 United States 1,204,974,260 4.4846
58 Serbia 1,587,635 4.4378
59 Uruguay NaturalistaUY 2,015,742 4.4319
60 Poland 16,895,227 4.3893
61 Germany 72,867,701 4.3444
62 Trinidad and Tobago 1,301,259 4.2251
63 Myanmar 355,497 4.1578
64 Ecuador iNaturalistEc 13,906,504 4.0896
65 Turkey 4,032,089 4.0353
66 Turkey 4,032,089 4.0353
67 Sri Lanka 2,686,470 4.0267
68 Bulgaria 2,740,526 3.9647
69 Bolivia 2,492,127 3.9480
70 Haiti 196,871 3.8284
71 South Korea 5,804,212 3.7333
72 Jamaica 774,333 3.6586
73 Latvia 893,275 3.5520
74 Azerbaijan 261,891 3.4881
75 Brazil 31,933,725 3.4878
76 Kuwait 344,029 3.4494
77 Iran 950,287 3.3573
78 Laos 275,575 3.3534
79 Australia iNaturalistAU 185,062,276 3.3334
80 Qatar 163,536 3.2482
81 Honduras 3,804,522 3.1315
82 Chile iNaturalistCL 12,329,845 3.1303
83 Tonga 136,819 3.1297
84 Tajikistan 95,908 3.1259
85 Spain Natusfera 80,295,996 3.0978
86 North Korea 48,412 3.0178
87 Syria 317,170 2.9924
88 Bahamas 1,099,589 2.9905
89 Iceland 2,535,418 2.9310
90 Palestine 625,030 2.8528
91 Liechtenstein 97,620 2.6695
92 Suriname 517,131 2.6444
93 Cambodia 1,151,596 2.6385
94 Panama iNaturalistPa 9,429,490 2.6022
95 Lebanon 188,865 2.5997
96 Nicaragua 2,282,549 2.5558
97 Cuba 2,123,735 2.5539
98 Saudi Arabia 596,198 2.5173
99 El Salvador 1,424,122 2.4690
100 United Kingdom iNaturalistUK 197,099,118 2.2427
101 Peru 10,594,413 2.2321
102 Bangladesh 760,745 2.1922
103 Nepal 1,718,650 2.1773
104 Ireland 6,772,745 2.0821
105 Oman 679,607 2.0247
106 Israel iNaturalistil 8,032,039 1.9720
107 Federated States of Micronesia 142,767 1.9528
108 Bhutan 733,994 1.9224
109 India 63,405,406 1.8926
110 Switzerland 30,787,704 1.7614
111 Costa Rica NaturalistaCR 36,438,909 1.7524
112 United Arab Emirates 1,962,937 1.6571
113 Pakistan 642,528 1.6158
114 France 197,318,563 1.3854
115 Colombia NaturalistaCO 37,891,906 1.3592
116 Finland iNaturalistFi 49,609,714 1.1837
117 Guyana 974,642 1.1344
118 Guatemala iNaturalistGT 5,309,889 1.1294
119 Kiribati 164,863 1.1167
120 Denmark 64,142,234 1.1047
121 Turkmenistan 24,320 1.0938
122 Belize 7,607,205 1.0104
123 Papua New Guinea 1,823,607 0.9176
124 Paraguay 1,550,251 0.8068
125 Belgium 41,453,454 0.8050
126 Nauru 4,889 0.7773
127 Venezuela 5,110,477 0.7244
128 Afghanistan 69,158 0.6651
129 Estonia 8,660,767 0.5359
130 Netherlands 131,925,688 0.3926
131 Norway 57,721,199 0.3669
132 Sweden 148,209,998 0.2440

Number of users recording in the country

Code
ids <- data_variables %>% 
  select(country_name, site_name, n_users) %>% 
  arrange(desc(n_users)) %>% 
  with(which(!is.na(site_name)))

data_variables %>% 
  select(country_name, site_name, n_users) %>% 
  arrange(desc(n_users)) %>%
  mutate('#'= row_number()) %>% relocate('#') %>% 
  rename(`Country` = country_name,
         `iNat site` = site_name,
         `Users recording on iNat`=n_users) %>% 
  kableExtra::kbl(digits=4, format.args = list(big.mark = ',')) %>% 
  kableExtra::kable_material('striped') %>% 
  kableExtra::row_spec(ids, bold = T, color = "white", background = "#228A22") %>% 
  kableExtra::scroll_box(height = '600px')
# Country iNat site Users recording on iNat
1 United States 1,934,413
2 Canada iNaturalist.ca 271,433
3 Mexico iNaturalistMX 176,834
4 United Kingdom iNaturalistUK 157,683
5 France 138,493
6 Australia iNaturalistAU 125,027
7 Italy 94,972
8 Germany 89,923
9 Spain Natusfera 82,474
10 Denmark 77,749
11 Brazil 74,949
12 Taiwan iNaturalistTW 61,961
13 Colombia NaturalistaCO 58,511
14 India 51,941
15 New Zealand iNaturalistNZ 48,690
16 Portugal BioDiversity4All 39,586
17 Ecuador iNaturalistEc 38,941
18 Costa Rica NaturalistaCR 38,826
19 Czechia 36,676
20 Austria 32,866
21 Netherlands 32,226
22 Finland iNaturalistFi 31,699
23 Bolivia 28,519
24 Belgium 26,335
25 Japan 25,935
26 Argentina ArgentiNat 25,924
27 Thailand 24,844
28 Switzerland 24,368
29 Indonesia 24,235
30 Chile iNaturalistCL 24,136
31 Greece iNaturalistGR 23,869
32 Sweden 21,803
33 Poland 19,909
34 China 19,896
35 Malaysia 19,822
36 Peru 18,321
37 Panama iNaturalistPa 18,283
38 Philippines 16,941
39 Turkey 15,141
40 Turkey 15,141
41 Croatia 14,978
42 Ireland 14,816
43 Norway 14,728
44 Ukraine 13,752
45 Singapore 10,948
46 Lithuania 9,841
47 Honduras 9,453
48 South Korea 8,963
49 Guatemala iNaturalistGT 8,398
50 Hungary 8,120
51 Iceland 7,696
52 Slovenia 7,537
53 Luxembourg iNaturalist.LU 7,319
54 Romania 6,973
55 Vietnam 6,760
56 Israel iNaturalistil 6,646
57 Dominican Republic 6,551
58 Slovakia 6,426
59 Belize 4,976
60 Sri Lanka 4,706
61 Bahamas 4,523
62 United Arab Emirates 4,331
63 Uruguay NaturalistaUY 4,196
64 Georgia 4,023
65 Georgia 4,023
66 Bulgaria 3,947
67 Kazakhstan 3,568
68 Nepal 3,476
69 Jamaica 3,341
70 Nicaragua 3,177
71 El Salvador 3,112
72 Estonia 3,111
73 Cuba 2,860
74 Serbia 2,728
75 Cyprus 2,690
76 Cyprus 2,690
77 Latvia 2,622
78 Cambodia 2,576
79 Montenegro 2,503
80 Venezuela 2,481
81 Trinidad and Tobago 2,460
82 Albania 2,220
83 Fiji 1,879
84 Malta 1,869
85 Maldives 1,705
86 Bosnia and Herzegovina 1,691
87 Pakistan 1,664
88 Bhutan 1,566
89 Armenia 1,563
90 Iran 1,499
91 Mongolia 1,472
92 Saudi Arabia 1,421
93 Paraguay 1,394
94 Laos 1,348
95 Myanmar 1,325
96 Jordan 1,317
97 Macedonia 1,220
98 Palestine 1,133
99 Kyrgyzstan 1,132
100 Oman 1,107
101 Uzbekistan 1,097
102 Bangladesh 1,044
103 Azerbaijan 815
104 Lebanon 748
105 Papua New Guinea 748
106 Guyana 653
107 Vanuatu 536
108 Suriname 525
109 Qatar 503
110 Brunei 482
111 Moldova 457
112 Haiti 450
113 Liechtenstein 448
114 Palau 448
115 Iraq 401
116 Federated States of Micronesia 367
117 Tajikistan 339
118 Kuwait 339
119 Bahrain 299
120 Solomon Islands 291
121 Samoa 237
122 Tonga 197
123 Syria 186
124 Yemen 145
125 Afghanistan 136
126 Marshall Islands 93
127 North Korea 92
128 Turkmenistan 91
129 Kiribati 43
130 Tuvalu 31
131 Nauru 11
132 Timor Leste

Associations between variables

Population of the country

Code
## records
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(population/100000, n_records_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Population of the country (hundred thousand)',
       y='Number of records on iNaturalist (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of records on iNaturalist vs Population of the country
Code
## records in GBIF
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(population/100000, n_records_gbif_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Population of the country (hundred thousand)',
       y='Number of iNat records on GBIF (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of iNat records on GBIF vs Population of the country
Code
## users
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(population/100000, n_users, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', col= 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Population of the country (hundred thousand)',
       y='Number of users recording on iNaturalist') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of users recording on iNaturalist vs Population of the country

Area of the country in km2

Code
## records
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(area/1000,n_records_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Area of the country (thousand km2)',
       y='Number of records on iNaturalist (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of records on iNaturalist vs Area of the country
Code
## records in gbif
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(area/1000, n_records_gbif_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Area of the country (thousand km2)',
       y='Number of iNat records on GBIF (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of iNat records on GBIF vs Area of the country
Code
## users
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(area/1000, n_users, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Area of the country (thousand km2)',
       y='Number of users recording on iNaturalist') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of users recording on iNaturalist vs Area of the country

Country’s centroid latitude

Code
## records
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(abs(latitude), n_records_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Absolute decimal latitude of the country\'s centroid',
       y='Number of records on iNaturalist (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of records on iNaturalist vs country’s centroid latitude
Code
## records in gbif
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(abs(latitude), n_records_gbif_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Absolute decimal latitude of the country\'s centroid',
       y='Number of iNat records on GBIF (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of iNat records on GBIF vs country’s centroid latitude
Code
## users
ggplot(data_variables %>% 
         mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(abs(latitude), n_users, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='Absolute decimal latitude of the country\'s centroid',
       y='Number of users recording on iNaturalist') +
  #scale_x_log10() + 
  scale_y_log10() +
  theme_bw()

Number of users recording on iNaturalist vs country’s centroid latitude

GDP per capita

Code
## records
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(gdp_per_capita/1000, n_records_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='GDP per capita (thousand USD)',
       y='Number of records on iNaturalist (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of records on iNaturalist vs country’s GDP per capita
Code
## records in gbif
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(gdp_per_capita/1000, n_records_gbif_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='GDP per capita (thousand USD)',
       y='Number of iNat records on GBIF (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of iNat records on GBIF vs country’s GDP per capita
Code
## users
ggplot(data_variables %>% 
         mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(gdp_per_capita/1000, n_users, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='GDP per capita (thousand USD)',
       y='Number of users recording on iNaturalist') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of users recording on iNaturalist vs GDP per capita

% of the GDP of the country dedicated to research

Code
## records
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(gdp_in_research, n_records_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='GDP of the country dedicated to research (%)',
       y='Number of records on iNaturalist (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of records on iNaturalist vs % of the country’s GDP dedicated to research
Code
## records in gbif
ggplot(data_variables %>% mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(gdp_in_research, n_records_gbif_inat/1000, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='GDP of the country dedicated to research (%)',
       y='Number of iNat records on GBIF (thousand)') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of iNat records on GBIF vs % of the country’s GDP dedicated to research
Code
## users
ggplot(data_variables %>% 
         mutate(site_on_iNat = ifelse(!is.na(site_id), 'yes', 'no')), 
       aes(gdp_in_research,n_users, label = site_name)) +
  geom_point(aes(col=site_on_iNat), size=2, show.legend = F) +
  scale_color_manual(values = c('black', '#74AC00')) +
  geom_smooth(method='lm', colour = 'black') +
  geom_label_repel(aes(fill=site_on_iNat), 
                   colour = "black", #fontface = "bold",
                   segment.color = 'black',
                   show.legend = F, max.overlaps= Inf) +
  scale_fill_manual(values = c('#F7F7F7', '#74AC00')) +
  labs(x='GDP of the country dedicated to research (%)',
       y='Number of users recording on iNaturalist') +
  scale_x_log10() + scale_y_log10() +
  theme_bw()

Number of users recording on iNaturalist vs % of the country’s GDP dedicated to research

Modelling

Code
data_regressions <- data_variables %>% 
  filter(country_code != 'US') %>% 
  mutate(has_site = ifelse(!is.na(site_name), 1, 0)) %>% 
  mutate(p_gbif = n_records_gbif_inat/n_records_inat) %>% 
  select(country_code, n_records_inat, p_gbif, n_users, n_taxa,
         area, gdp_per_capita, gdp_in_research, 
         population, latitude, has_site) %>% 
  filter(!is.na(gdp_in_research) & !is.na(latitude)) 

###

data_regressions %>% 
  select(area, gdp_per_capita, gdp_in_research, 
         population, latitude, has_site) %>% 
  cor() %>% 
  kableExtra::kbl(digits=3, format.args = list(big.mark = ',')) %>% 
  kableExtra::kable_material('striped') 
area gdp_per_capita gdp_in_research population latitude has_site
area 1.000 0.020 0.072 0.423 -0.106 0.196
gdp_per_capita 0.020 1.000 0.585 -0.143 0.355 0.185
gdp_in_research 0.072 0.585 1.000 0.068 0.359 0.126
population 0.423 -0.143 0.068 1.000 -0.037 -0.078
latitude -0.106 0.355 0.359 -0.037 1.000 -0.308
has_site 0.196 0.185 0.126 -0.078 -0.308 1.000

Random forest

Functions

Code
source('R/random_forest_iNat.R')

Total number of records on iNaturalist per country

Code
rf_n_records <- model_iNat_RF(data_df = data_regressions, 
              explanatory_variable = 'n_records_inat', 
              skip_vars = c('p_gbif', 
                            'n_users',
                            'n_taxa'))
Time difference of 1.476133 secs
Code
# tuning
rf_n_records[[1]]

Code
# observed vs predicted
rf_n_records[[2]]

Code
# variable importance
rf_n_records[[3]]

Code
ggsave(rf_n_records[[3]], filename='figs/variable_importance_n_records.png', device = 'png', dpi = 300, width = 8, height = 5)

# partial plots
rf_n_records[[4]]

Code
ggsave(rf_n_records[[4]], filename='figs/partial_plots_n_records.png', device = 'png', dpi = 300, width = 8, height = 5)

# final model metrics
rf_n_records[[5]] %>% 
  select(dataset, rsq=.estimate) %>% 
  mutate(rsq = round(rsq, 3)) %>% 
  rename(`R^2` = rsq) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
dataset R^2
train 0.954
test 0.786
Code
# variable importance
rf_n_records[[6]] %>% 
  select(Variable, Importance=standardise_Importance) %>% 
  mutate(Importance = round(Importance, 3)) %>% 
  rename(`Portion of R^2` = Importance) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
Variable Portion of R^2
area 0.192
population 0.135
latitude 0.131
gdp_per_capita 0.120
gdp_in_research 0.117
has_site 0.091

Proportion of records from iNaturalist on GBIF per country

Code
rf_p_gbif <- model_iNat_RF(data_df = data_regressions, 
              explanatory_variable = 'p_gbif', 
              skip_vars = c('n_records_inat', 
                            'n_users',
                            'n_taxa'))
Time difference of 1.101691 secs
Code
# tuning
rf_p_gbif[[1]]

Code
# observed vs predicted
rf_p_gbif[[2]]

Code
# variable importance
rf_p_gbif[[3]]

Code
ggsave(rf_p_gbif[[3]], filename='figs/variable_importance_p_gbif.png', device = 'png', dpi = 300, width = 8, height = 5)

# partial plots
rf_p_gbif[[4]]

Code
ggsave(rf_p_gbif[[4]], filename='figs/partial_plots_p_gbif.png', device = 'png', dpi = 300, width = 8, height = 5)

# final model metrics
rf_p_gbif[[5]] %>% 
  select(dataset, rsq=.estimate) %>% 
  mutate(rsq = round(rsq, 3)) %>% 
  rename(`R^2` = rsq) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
dataset R^2
train 0.909
test 0.219
Code
# variable importance
rf_p_gbif[[6]] %>% 
  select(Variable, Importance=standardise_Importance) %>% 
  mutate(Importance = round(Importance, 3)) %>% 
  rename(`Portion of R^2` = Importance) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
Variable Portion of R^2
gdp_per_capita 0.058
gdp_in_research 0.047
latitude 0.042
population 0.035
area 0.032
has_site 0.006

Number of users recording in the country

Code
rf_n_users <- model_iNat_RF(data_df = data_regressions,
                            explanatory_variable = 'n_users',
                            skip_vars = c('n_records_inat',
                                          'p_gbif',
                                          'n_taxa'))
Time difference of 1.114389 secs
Code
# tuning
rf_n_users[[1]]

Code
# observed vs predicted
rf_n_users[[2]]

Code
# variable importance
rf_n_users[[3]]

Code
ggsave(rf_n_users[[3]], filename='figs/variable_importance_n_users.png', device = 'png', dpi = 300, width = 8, height = 5)

# partial plots
rf_n_users[[4]]

Code
ggsave(rf_n_users[[4]], filename='figs/partial_plots_n_users.png', device = 'png', dpi = 300, width = 8, height = 5)

# final model metrics
rf_n_users[[5]] %>% 
  select(dataset, rsq=.estimate) %>% 
  mutate(rsq = round(rsq, 3)) %>% 
  rename(`R^2` = rsq) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
dataset R^2
train 0.949
test 0.521
Code
# variable importance
rf_n_users[[6]] %>% 
  select(Variable, Importance=standardise_Importance) %>% 
  mutate(Importance = round(Importance, 3)) %>% 
  rename(`Portion of R^2` = Importance) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
Variable Portion of R^2
population 0.107
area 0.105
gdp_in_research 0.087
gdp_per_capita 0.082
latitude 0.075
has_site 0.065

Number of taxa recorded in the country

Code
rf_n_taxa <- model_iNat_RF(data_df = data_regressions,
                           explanatory_variable = 'n_taxa',
                           skip_vars = c('n_records_inat',
                                         'p_gbif',
                                         'n_users'))
Time difference of 1.156683 secs
Code
# tuning
rf_n_taxa[[1]]

Code
# observed vs predicted
rf_n_taxa[[2]]

Code
# variable importance
rf_n_taxa[[3]]

Code
ggsave(rf_n_taxa[[3]], filename='figs/variable_importance_n_taxa.png', device = 'png', dpi = 300, width = 8, height = 5)

# partial plots
rf_n_taxa[[4]]

Code
ggsave(rf_n_taxa[[4]], filename='figs/partial_plots_n_taxa.png', device = 'png', dpi = 300, width = 8, height = 5)

# final model metrics
rf_n_taxa[[5]] %>% 
  select(dataset, rsq=.estimate) %>% 
  mutate(rsq = round(rsq, 3)) %>% 
  rename(`R^2` = rsq) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
dataset R^2
train 0.937
test 0.782
Code
# variable importance
rf_n_taxa[[6]] %>% 
  select(Variable, Importance=standardise_Importance) %>% 
  mutate(Importance = round(Importance, 3)) %>% 
  rename(`Portion of R^2` = Importance) %>% 
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
Variable Portion of R^2
population 0.185
area 0.160
gdp_in_research 0.122
latitude 0.119
has_site 0.100
gdp_per_capita 0.096